from sklearn.feature_extraction.text import CountVectorizer

def en_count_demo():
    """
    英文文本特征提取
    :return:
    """
    data = ["life is short,i like python","life is too too long,i dislike python"]
    # 1.实例化一个转换器类
    transfer = CountVectorizer()
    # transfer = CountVectorizer(stop_words=["is","too"]) #带停用词形式
    # 2.调用 fit_transform
    data_new = transfer.fit_transform(data)
    print("data_new 的类型：" , type(data_new))
    print("文本特征抽取的结果：\n", data_new.toarray())
    print("返回特征名字：\n", transfer.get_feature_names_out())
    return None

if __name__ == "__main__":
    # 代码3：英文文本特征抽取
    en_count_demo()